Some cudadrv tests #2684

kshyatt · 2025-03-08T23:47:54Z

Also added some more @testset blocks instead of let

github-actions · 2025-03-08T23:48:29Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/src/texture.jl b/src/texture.jl
index fc1590423..bbdca57db 100644
--- a/src/texture.jl
+++ b/src/texture.jl
@@ -97,7 +97,7 @@ end
 
 # idempotency
 CuTextureArray{T,N}(xs::CuTextureArray{T,N}) where {T,N} = xs
-CuTextureArray(xs::CuTextureArray{T,N}) where {T,N} = xs
+CuTextureArray(xs::CuTextureArray{T, N}) where {T, N} = xs
 
 CuTextureArray(A::AbstractArray{T,N}) where {T,N} = CuTextureArray{T,N}(A)
 
diff --git a/test/core/cudadrv.jl b/test/core/cudadrv.jl
index 2372cb568..7772cf9cb 100644
--- a/test/core/cudadrv.jl
+++ b/test/core/cudadrv.jl
@@ -435,15 +435,15 @@ nb = sizeof(data)
 typed_pointer(buf::Union{CUDA.DeviceMemory, CUDA.UnifiedMemory}, T) = convert(CuPtr{T}, buf)
 typed_pointer(buf::CUDA.HostMemory, T)                              = convert(Ptr{T},   buf)
 
-@testset "showing" begin
-    for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
-        dummy = CUDA.alloc(Ty, 0)
-        @test startswith(sprint(show, dummy), str)
-        CUDA.free(dummy)
+    @testset "showing" begin
+        for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
+            dummy = CUDA.alloc(Ty, 0)
+            @test startswith(sprint(show, dummy), str)
+            CUDA.free(dummy)
+        end
     end
-end
 
-@testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
+    @testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
     dstTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory]
 
     dummy = CUDA.alloc(srcTy, 0)
@@ -479,7 +479,7 @@ end
 
     # test device with context in which pointer was allocated.
     @test device(typed_pointer(src, T)) == device()
-    @test context(typed_pointer(src, T)) == context()
+        @test context(typed_pointer(src, T)) == context()
     if !memory_pools_supported(device())
         # NVIDIA bug #3319609
         @test context(typed_pointer(src, T)) == context()
@@ -503,7 +503,7 @@ end
     CUDA.free(dst)
 end
 
-@testset "pointer attributes" begin
+    @testset "pointer attributes" begin
     src = CUDA.alloc(CUDA.DeviceMemory, nb)
 
     attribute!(typed_pointer(src, T), CUDA.POINTER_ATTRIBUTE_SYNC_MEMOPS, 0)
@@ -511,7 +511,7 @@ end
     CUDA.free(src)
 end
 
-@testset "asynchronous operations" begin
+    @testset "asynchronous operations" begin
     src = CUDA.alloc(CUDA.DeviceMemory, nb)
 
     unsafe_copyto!(typed_pointer(src, T), pointer(data), N; async=true)
@@ -521,7 +521,7 @@ end
     CUDA.free(src)
 end
 
-@testset "pinned memory" begin
+    @testset "pinned memory" begin
     # create a pinned and mapped buffer
     src = CUDA.alloc(CUDA.HostMemory, nb, CUDA.MEMHOSTALLOC_DEVICEMAP)
 
@@ -553,16 +553,16 @@ if attribute(device(), CUDA.DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED) != 0
 
     CUDA.unregister(src)
 
-    # with a RefValue
-    src = Ref{T}(T(42))
-    CUDA.pin(src)
-    cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
-    ref = Array{T}(undef, 1)
-    unsafe_copyto!(pointer(ref), cpu_ptr, 1)
-    @test ref == [T(42)]
+        # with a RefValue
+        src = Ref{T}(T(42))
+        CUDA.pin(src)
+        cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
+        ref = Array{T}(undef, 1)
+        unsafe_copyto!(pointer(ref), cpu_ptr, 1)
+        @test ref == [T(42)]
 end
 
-@testset "unified memory" begin
+    @testset "unified memory" begin
     src = CUDA.alloc(CUDA.UnifiedMemory, nb)
 
     @test_throws BoundsError CUDA.prefetch(src, 2*nb; device=CUDA.DEVICE_CPU)
@@ -583,7 +583,7 @@ end
     CUDA.free(src)
 end
 
-@testset "3d memcpy" begin
+    @testset "3d memcpy" begin
     # TODO: use cuMemAllocPitch (and put pitch in buffer?) to actually get benefit from this
 
     data = collect(reshape(1:27, 3, 3, 3))

codecov · 2025-03-09T01:53:41Z

Codecov Report

All modified and coverable lines are covered by tests ✅

Project coverage is 82.86%. Comparing base (6bf72dd) to head (dbb2215).

Additional details and impacted files

@@            Coverage Diff             @@
##           master    #2684      +/-   ##
==========================================
+ Coverage   82.57%   82.86%   +0.28%     
==========================================
  Files         153      153              
  Lines       13606    13606              
==========================================
+ Hits        11235    11274      +39     
+ Misses       2371     2332      -39

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

github-actions

CUDA.jl Benchmarks

Benchmark suite	Current: `dbb2215`	Previous: `6bf72dd`	Ratio
`latency/precompile`	`46183641505` ns	`46450283323` ns	`0.99`
`latency/ttfp`	`7001382825` ns	`7028014177` ns	`1.00`
`latency/import`	`3651822156` ns	`3667348189` ns	`1.00`
`integration/volumerhs`	`9616637.5` ns	`9625836` ns	`1.00`
`integration/byval/slices=1`	`146680.5` ns	`146875` ns	`1.00`
`integration/byval/slices=3`	`425045` ns	`424860` ns	`1.00`
`integration/byval/reference`	`144900` ns	`144959` ns	`1.00`
`integration/byval/slices=2`	`285974` ns	`285961` ns	`1.00`
`integration/cudadevrt`	`103233` ns	`103332` ns	`1.00`
`kernel/indexing`	`13949` ns	`14061` ns	`0.99`
`kernel/indexing_checked`	`14499` ns	`14775` ns	`0.98`
`kernel/occupancy`	`637.1046511627907` ns	`656.859649122807` ns	`0.97`
`kernel/launch`	`2004.2` ns	`2090.2` ns	`0.96`
`kernel/rand`	`14598` ns	`16643` ns	`0.88`
`array/reverse/1d`	`19363` ns	`19602` ns	`0.99`
`array/reverse/2d`	`24535` ns	`24553` ns	`1.00`
`array/reverse/1d_inplace`	`10719` ns	`11159` ns	`0.96`
`array/reverse/2d_inplace`	`12480` ns	`13010` ns	`0.96`
`array/copy`	`21175` ns	`20672` ns	`1.02`
`array/iteration/findall/int`	`157906` ns	`157732` ns	`1.00`
`array/iteration/findall/bool`	`138756` ns	`138853.5` ns	`1.00`
`array/iteration/findfirst/int`	`152794` ns	`153651.5` ns	`0.99`
`array/iteration/findfirst/bool`	`154428` ns	`153926` ns	`1.00`
`array/iteration/scalar`	`70496` ns	`72447` ns	`0.97`
`array/iteration/logical`	`213423` ns	`206640.5` ns	`1.03`
`array/iteration/findmin/1d`	`41018` ns	`40619` ns	`1.01`
`array/iteration/findmin/2d`	`93431` ns	`93219` ns	`1.00`
`array/reductions/reduce/1d`	`35842` ns	`34826` ns	`1.03`
`array/reductions/reduce/2d`	`40715` ns	`50621` ns	`0.80`
`array/reductions/mapreduce/1d`	`32909` ns	`32476` ns	`1.01`
`array/reductions/mapreduce/2d`	`41241` ns	`50769` ns	`0.81`
`array/broadcast`	`20512` ns	`20427` ns	`1.00`
`array/copyto!/gpu_to_gpu`	`13698` ns	`11886` ns	`1.15`
`array/copyto!/cpu_to_gpu`	`208479.5` ns	`207751.5` ns	`1.00`
`array/copyto!/gpu_to_cpu`	`243344` ns	`245794` ns	`0.99`
`array/accumulate/1d`	`108244` ns	`109015` ns	`0.99`
`array/accumulate/2d`	`80382` ns	`79626` ns	`1.01`
`array/construct`	`1302.7` ns	`1306` ns	`1.00`
`array/random/randn/Float32`	`43238` ns	`43298.5` ns	`1.00`
`array/random/randn!/Float32`	`26422` ns	`26052` ns	`1.01`
`array/random/rand!/Int64`	`26958` ns	`26998` ns	`1.00`
`array/random/rand!/Float32`	`8688.5` ns	`8602.333333333334` ns	`1.01`
`array/random/rand/Int64`	`29725` ns	`29780` ns	`1.00`
`array/random/rand/Float32`	`13025` ns	`12942` ns	`1.01`
`array/permutedims/4d`	`61583` ns	`60894` ns	`1.01`
`array/permutedims/2d`	`55519.5` ns	`55115` ns	`1.01`
`array/permutedims/3d`	`55841.5` ns	`55898` ns	`1.00`
`array/sorting/1d`	`2775794` ns	`2776458` ns	`1.00`
`array/sorting/by`	`3367253` ns	`3369147.5` ns	`1.00`
`array/sorting/2d`	`1084250` ns	`1084406` ns	`1.00`
`cuda/synchronization/stream/auto`	`1004` ns	`1025.7` ns	`0.98`
`cuda/synchronization/stream/nonblocking`	`6261.8` ns	`6461.2` ns	`0.97`
`cuda/synchronization/stream/blocking`	`776.5849056603773` ns	`789.3663366336634` ns	`0.98`
`cuda/synchronization/context/auto`	`1160.1` ns	`1164` ns	`1.00`
`cuda/synchronization/context/nonblocking`	`6588` ns	`6604.6` ns	`1.00`
`cuda/synchronization/context/blocking`	`903.7234042553191` ns	`889.4285714285714` ns	`1.02`

This comment was automatically generated by workflow using github-action-benchmark.

Some cudadrv tests

dbb2215

kshyatt added the tests Adds or changes tests. label Mar 8, 2025

kshyatt requested a review from maleadt March 8, 2025 23:47

github-actions bot reviewed Mar 9, 2025

View reviewed changes

Add another idempotent texture method and tests

855c8e4

maleadt approved these changes Mar 10, 2025

View reviewed changes

maleadt merged commit 9455b65 into master Mar 10, 2025
2 of 3 checks passed

maleadt deleted the ksh/cudadrv branch March 10, 2025 05:52

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Some cudadrv tests #2684

Some cudadrv tests #2684

kshyatt commented Mar 8, 2025

github-actions bot commented Mar 8, 2025 •

edited

Loading

codecov bot commented Mar 9, 2025 •

edited

Loading

github-actions bot left a comment

Some cudadrv tests #2684

Some cudadrv tests #2684

Conversation

kshyatt commented Mar 8, 2025

github-actions bot commented Mar 8, 2025 • edited Loading

codecov bot commented Mar 9, 2025 • edited Loading

Codecov Report

github-actions bot left a comment

Choose a reason for hiding this comment

CUDA.jl Benchmarks

github-actions bot commented Mar 8, 2025 •

edited

Loading

codecov bot commented Mar 9, 2025 •

edited

Loading